In [ ]:
# Import the Python libraries we need
import pandas as pd
In [ ]:
# Define a variable for the accidents data file
accidents_data_file = '/Users/robert.dempsey/Dropbox/Private/Art of Skill Hacking/' \
'Books/Python Business Intelligence Cookbook/Data/Stats19-Data1979-2004/Accidents7904.csv'
Import the entire accidents dataset
In [ ]:
accidents = pd.read_csv(accidents_data_file,
sep=',',
header=0,
index_col=False,
parse_dates=True,
tupleize_cols=False,
error_bad_lines=False,
warn_bad_lines=True,
skip_blank_lines=True,
low_memory=False
)
accidents.head()
Import the first 1000 rows of the accidents dataset
In [ ]:
# Use the first column of the CSV file as the index
accidents = pd.read_csv(accidents_data_file,
sep=',',
header=0,
index_col=0,
parse_dates=True,
tupleize_cols=False,
error_bad_lines=True,
warn_bad_lines=True,
skip_blank_lines=True,
nrows=1000
)
accidents.head()
In [ ]:
# Import the Python libraries we need
import pandas as pd
In [ ]:
# Define a variable for the accidents data file
customer_data_file = 'data/customer_data.xlsx'
In [ ]:
# Create a dataframe from the Excel file
customers = pd.read_excel(customer_data_file,
sheetname=0,
header=0,
index_col=False,
keep_default_na=True
)
customers.head()
In [ ]:
# Create a JSON file from the DataFrame for the next recipe
customers.to_json('data/customer_data.json', orient='records')
In [ ]:
# Import the Python libraries we need
import pandas as pd
In [ ]:
# Define a variable for our JSON file
customer_json_file = 'data/customer_data.json'
In [ ]:
# Create a dataframe from the JSON file
# Note: dates in our JSON file are stored in ISO format
customers_json = pd.read_json(customer_json_file,
convert_dates=True)
customers_json.head()
In [ ]: